# Load packages -----------------------------------
library(tidyverse)
library(ggpubr)
library(dplyr)
library(Seurat)
library(ggplot2)
library(paletteer)
library(readxl)
library(writexl)
library(qs)
library(ggrastr)
library(cowplot)
library(openxlsx)
library(patchwork)
library(SingleR)
library(celldex)
# Organize environment -----------------------------------
base_dir <- "/Users/sdaniell/Dropbox (Partners HealthCare)/Sara Danielli/Project/Ependymoma/11 - Plots scRNAseq"
data_dir <- file.path(base_dir, "data")
analysis_dir <- file.path(base_dir, 'analysis')
plot_dir <- file.path(analysis_dir, "plots/STEPN/malignant")
if (!dir.exists(plot_dir)){dir.create(plot_dir, recursive = T)}
ref_dir <- file.path('/Users/sdaniell/Dropbox (Partners HealthCare)/Sara Danielli/Project/Developmental datasets/data/Nowakowski_Eze')
resource_dir <- file.path(base_dir, "scripts/resources")
source(file.path(resource_dir, "Plot_style_v2.R"))
source(file.path(resource_dir, "NMF_helper_function.R"))
# Define color palettes -----------------------------------
col_subtype = c('ZFTA-RELA' = "#B44672",'ZFTA-Cluster 1' = "#B47846", 'ZFTA-Cluster 2' = "#46B478",
'ZFTA-Cluster 3' = "#46B4AF", 'ZFTA-Cluster 4' = "#4682B4",'ST-YAP1' = "#B4AF46")
colors_metaprograms<- c("gray30","#F99E93FF","#9E5E9BFF","#74ADD1FF",'#0F4F8B', "#ACD39EFF","#96410EFF", 'mistyrose1')
names(colors_metaprograms) <- c("Cycling", "Neuroepithelial-like", "Radial-glia-like",
"Embryonic-neuronal-like", "Neuronal-like" ,"Ependymal-like", "MES-like", "Embryonic-like")
# load in data -----------------------------------
# load seurat objects
seurat_obj_mal <- qread(file = file.path(base_dir, "data/patient/seurat_obj_ST_combined_malig_annotated.qs"))
seurat_obj_mal$Metaprogram <- factor(x = seurat_obj_mal$Metaprogram, levels = c("Cycling", "Embryonic-like", "Neuroepithelial-like", "Radial-glia-like",
"Embryonic-neuronal-like", "Neuronal-like" ,"Ependymal-like", "MES-like"))
seurat_obj_mal$Subtype <- factor(x = seurat_obj_mal$Subtype, levels = c("ZFTA-RELA" ,"ZFTA-Cluster 1", "ZFTA-Cluster 2", "ZFTA-Cluster 3", "ZFTA-Cluster 4", "ST-YAP1"))
# load NMF gene list
markers_NMF <- readRDS(file.path(base_dir, 'data/signatures/nmf_marker_genes_final_annotated.rds'))
names(markers_NMF) <- c("Neuroepithelial-like", "MES-like","Ependymal-like","Radial-glia-like","Cycling", "Embryonic-neuronal-like", "Neuronal-like", "Embryonic-like")
markers_NMF_200 <- readRDS(file.path(base_dir, 'data/signatures/nmf_marker_genes_final_annotated_200.rds'))
write.xlsx(as.data.frame(markers_NMF), file.path(plot_dir, "1_NMF_marker_genes_top200.xlsx"))
# select top 30
markers_NMF_30 <- lapply(markers_NMF, head, 30)
# Find Markers -----------------------------------
Idents(seurat_obj_mal) <- 'Metaprogram'
markers <- FindAllMarkers(seurat_obj_mal,
logfc.threshold = 0.5,
min.pct = 0.15,
only.pos = T)
FALSE Calculating cluster Cycling
FALSE Calculating cluster Embryonic-like
FALSE Calculating cluster Neuroepithelial-like
FALSE Calculating cluster Radial-glia-like
FALSE Calculating cluster Embryonic-neuronal-like
FALSE Calculating cluster Neuronal-like
FALSE Calculating cluster Ependymal-like
FALSE Calculating cluster MES-like
markers <- markers %>%
filter(p_val_adj < 0.05)
write_xlsx(markers, file.path(plot_dir, '1_marker_genes.xlsx'))
seurat_obj_mal_table <- seurat_obj_mal@meta.data %>%
group_by(Sample_deID, sample, ) %>%
summarise(nCount_RNA = mean(nCount_RNA),
nFeature_RNA = mean(nFeature_RNA),
n = n())
FALSE `summarise()` has grouped output by 'Sample_deID'. You can override using the
FALSE `.groups` argument.
seurat_obj_mal_table
write.xlsx(seurat_obj_mal_table, file.path(plot_dir, "0_scRNAseq_details_STEPN.xlsx"))
DimPlot(object = seurat_obj_mal,
reduction = 'umap.harmony',
group.by = "Subtype",
pt.size = 0.5,
shuffle = TRUE,
label = TRUE,
label.size = 0,
cols = col_subtype
) +
labs(title = 'ST-EPN', subtitle = 'n = 6,933 cells') +
theme_tSNE
ggsave(file.path(plot_dir, "1_umap_harmony_Subtype.pdf"), width=5, height=4)
DimPlot(object = seurat_obj_mal,
reduction = 'umap.harmony',
group.by = "Metaprogram",
pt.size = 0.5,
shuffle = TRUE,
label = TRUE,
label.size = 0,
cols = colors_metaprograms
) +
labs(title = 'ST-EPN', subtitle = 'n = 6,933 cells') +
theme_tSNE
ggsave(file.path(plot_dir, "1_umap_harmony_metaprogram.pdf"), width=6, height=4)
## UMAP By sample
DimPlot(object = seurat_obj_mal, reduction = 'umap.harmony', group.by = "Sample_deID", label = FALSE, pt.size = 0.5, shuffle = TRUE, label.size = 0, cols = col_Sample_deID)+
labs(title = 'ST-EPN', subtitle = 'n = 6,933 cells') +
theme_tSNE
ggsave(file.path(plot_dir, '1_umap_harmony_metaprogram_sampleID.pdf'), width = 7.5, height = 4)
DimPlot(object = seurat_obj_mal,
reduction = 'umap.unintegrated',
group.by = "Subtype",
pt.size = 0.5,
shuffle = TRUE,
label = TRUE,
label.size = 4,
cols = col_subtype
) +
labs(title = 'ST-EPN', subtitle = 'n = 6,933 cells') +
theme_tSNE
ggsave(file.path(plot_dir, "2_umap_unintegrated_Subtype.pdf"), width=5.5, height=4)
## Dotplot markers
Idents(seurat_obj_mal) <- 'type'
DotPlot(seurat_obj_mal,
features = c('FOXJ1', 'TTR'),
dot.scale = 6,
col.min = 0,
#cols = c("grey90", "red3"),
scale = F, assay = 'RNA'
) + RotatedAxis() +
paletteer::scale_colour_paletteer_c("viridis::mako", direction = -1)
FALSE Scale for colour is already present.
FALSE Adding another scale for colour, which will replace the existing scale.
ggsave(file.path(plot_dir, "Dotpot_FOXJ1_TTR.pdf"), width=4, height=2)
## Bar plot
plot_bar(seurat_obj_mal, seurat_obj_mal$Sample_deID, seurat_obj_mal$Metaprogram, col = colors_metaprograms)
ggsave(file.path(plot_dir, "3_Barplot_sample_combined.pdf"), width=12, height=4)
## Bar plot
plot_bar(seurat_obj_mal, seurat_obj_mal$Subtype, seurat_obj_mal$Metaprogram, col = colors_metaprograms)
ggsave(file.path(plot_dir, "4_Barplot_Subtype_combined.pdf"), width=6, height=4)
# Score ST-EPN cells for neurodevelopmental genes
scores <- c(markers_NMF_30['Neuroepithelial-like'], markers_NMF_30['Ependymal-like'], markers_NMF_30['Neuronal-like'], markers_NMF_30['Embryonic-like'], markers_NMF_30['Embryonic-neuronal-like'])
names(scores) <- c('Neuroepithelial-like', 'Ependymal-like', 'Neuronal-like', 'Embryonic-like', 'Embryonic-neuronal-like')
seurat_obj_mal <- AddModuleScore(object = seurat_obj_mal, assay = 'RNA', features = scores, name = names(scores), seed=5)
# rename metadata names of scores
# identify number of first column with metadata scores
col_start <- length(colnames(seurat_obj_mal@meta.data)) - length(names(scores)) +1
# identify number of last column with metadata scores
col_end <- length(colnames(seurat_obj_mal@meta.data))
# rename columns with score name
colnames(seurat_obj_mal@meta.data)[col_start:col_end] <- names(scores)
x1 = "Neuroepithelial-like"
x2 = "Embryonic-like"
y1 = "Neuronal-like"
y2 = "Ependymal-like"
group.by="Metaprogram"
sample=seurat_obj_mal
variables_to_retrieve <- c(x1, x2, y1,y2)
# And store them as a tibble.
scores <- sample@meta.data[, variables_to_retrieve]
# Shuffle the cells so that we accomplish a random plotting, not sample by sample.
# Compute Y axis values.
d <- apply(scores, 1, function(x){max(x[c(x1, x2)]) - max(x[c(y1, y2)])})
# Compute X axis values.
x <- vapply(seq_along(d), function(x) {
if (d[x] > 0) {
d <- log2(abs(scores[x, x1] - scores[x, x2]) + 1)
ifelse(scores[x, x1] < scores[x, x2], d, -d)
} else {
d <- log2(abs(scores[x, y1] - scores[x, y2]) + 1)
ifelse(scores[x, y1] < scores[x, y2], d, -d)
}
}, FUN.VALUE = numeric(1))
names(x) <- rownames(scores)
# Plot.
df_E <- data.frame(row.names = rownames(scores))
df_E[["set_x"]] <- x
df_E[["set_y"]] <- d
df_E[["group.by"]] <- sample@meta.data[, group.by]
df_E[["subtype"]] <- seurat_obj_mal$Subtype
colors_metaprograms<- c("gray30","#F99E93FF","#9E5E9BFF","#74ADD1FF",'#0F4F8B', "#ACD39EFF","#96410EFF", 'mistyrose1')
names(colors_metaprograms) <- c("Cycling", "Neuroepithelial-like", "Radial-glia-like",
"Embryonic/neuronal-like", "Neuronal-like" ,"Ependymal-like", "MES-like", "Embryonic-like")
ggplot(df_E,aes(x=set_x,
y=set_y,
col=group.by))+
geom_point(size=0.5)+
scale_color_manual(values=colors_metaprograms)+
theme_cellstate_plot +
labs(x = 'Relative meta-module score', y = 'Relative meta-module score', title = 'ST-EPN cellular hierarchy', subtitle = 'n = 6,933 cells') +
geom_vline(xintercept = 0, linetype="dotted") + geom_hline(yintercept = 0, linetype="dotted")
ggsave(file.path(plot_dir, "6_CellState_plot_4way_Metaprograms.pdf"), width=5.5, height=4)
x1 = "Neuroepithelial-like"
x2 = "Embryonic-like"
y1 = "Neuronal-like"
y2 = "Ependymal-like"
group.by="Subtype"
sample=seurat_obj_mal
variables_to_retrieve <- c(x1, x2, y1,y2)
# And store them as a tibble.
scores <- sample@meta.data[, variables_to_retrieve]
# Shuffle the cells so that we accomplish a random plotting, not sample by sample.
# Compute Y axis values.
d <- apply(scores, 1, function(x){max(x[c(x1, x2)]) - max(x[c(y1, y2)])})
# Compute X axis values.
x <- vapply(seq_along(d), function(x) {
if (d[x] > 0) {
d <- log2(abs(scores[x, x1] - scores[x, x2]) + 1)
ifelse(scores[x, x1] < scores[x, x2], d, -d)
} else {
d <- log2(abs(scores[x, y1] - scores[x, y2]) + 1)
ifelse(scores[x, y1] < scores[x, y2], d, -d)
}
}, FUN.VALUE = numeric(1))
names(x) <- rownames(scores)
# Define titles for the axis.
x_lab1 <- paste0(y1, " <----> ", y2)
x_lab2 <- paste0(x1, " <----> ", x2)
y_lab1 <- paste0(y1, " <----> ", x1)
y_lab2 <- paste0(x2, " <----> ", y2)
# Plot.
df_E <- data.frame(row.names = rownames(scores))
df_E[["set_x"]] <- x
df_E[["set_y"]] <- d
df_E[["group.by"]] <- sample@meta.data[, group.by]
df_E[["subtype"]] <- seurat_obj_mal$Subtype
colors.use=c("#B44672","#B47846","#46B478","#46B4AF","#4682B4","#B4AF46")
names(colors.use) = c("ZFTA-RELA","ZFTA-Cluster 1","ZFTA-Cluster 2","ZFTA-Cluster 3","ZFTA-Cluster 4","ST-YAP1")
samples = unique(df_E$group.by)
samples = c("ZFTA-RELA","ZFTA-Cluster 1","ZFTA-Cluster 2","ZFTA-Cluster 3","ZFTA-Cluster 4", "ST-YAP1")
for (i in 1:length(samples)) {
tmp <- df_E$group.by==samples[i]
tmp <- gsub("TRUE",samples[i],tmp)
tmp <- gsub("FALSE","Other",tmp)
df_E <- cbind(df_E,tmp)
}
subtypes <- c("ZFTA-RELA","ZFTA-Cluster 1","ZFTA-Cluster 2","ZFTA-Cluster 3","ZFTA-Cluster 4", "ST-YAP1")
colnames(df_E)[5:10] <- subtypes
ggplot(df_E,aes(x=set_x,
y=set_y,
col=group.by))+
geom_point(size=0.5)+
scale_color_manual(values=colors.use)+
theme_cellstate_plot +
labs(x = 'Relative meta-module score', y = 'Relative meta-module score', title = 'ST-EPN cellular hierarchy', subtitle = 'n = 6,933 cells') +
geom_vline(xintercept = 0, linetype="dotted") + geom_hline(yintercept = 0, linetype="dotted")
ggsave(file.path(plot_dir, "6_CellState_plot_4way_subtype.pdf"), width=5.2, height=4)
Split by subtype
p1 <- ggplot(df_E %>%
arrange(`ZFTA-RELA`))+
geom_point(aes(x=set_x, y=set_y,color=`ZFTA-RELA`),size=1)+
scale_colour_manual(values=c('grey90', "#B44672"), name="")+
theme_cellstate_plot +
labs( title = 'ZFTA-RELA') +
geom_vline(xintercept = 0, linetype="dotted") + geom_hline(yintercept = 0, linetype="dotted") +
#scale_x_continuous(limits = c(-0.7, 1)) +
#scale_y_continuous(limits = c(-1.2, 1.5)) +
theme(axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks = element_blank()) +
theme(legend.direction="horizontal",legend.position="bottom")
p2 <- ggplot(df_E %>%
arrange(`ZFTA-Cluster 1`))+
geom_point(aes(x=set_x, y=set_y,color=`ZFTA-Cluster 1`),size=1)+
scale_colour_manual(values=c('grey90', "#B47846"), name="")+
theme_cellstate_plot +
labs(title = 'ZFTA-Cluster 1') +
geom_vline(xintercept = 0, linetype="dotted") + geom_hline(yintercept = 0, linetype="dotted") +
#scale_x_continuous(limits = c(-0.7, 1)) +
#scale_y_continuous(limits = c(-1.2, 1.5)) +
theme(axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks = element_blank())+
theme(legend.direction="horizontal",legend.position="bottom")
p3 <- ggplot(df_E %>%
arrange(`ZFTA-Cluster 2`))+
geom_point(aes(x=set_x, y=set_y,color=`ZFTA-Cluster 2`),size=1)+
scale_colour_manual(values=c('grey90', "#46B478"), name="")+
theme_cellstate_plot +
labs(title = 'ZFTA-Cluster 2') +
geom_vline(xintercept = 0, linetype="dotted") + geom_hline(yintercept = 0, linetype="dotted") +
#scale_x_continuous(limits = c(-0.7, 1)) +
#scale_y_continuous(limits = c(-1.2, 1.5)) +
theme(axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks = element_blank())+
theme(legend.direction="horizontal",legend.position="bottom")
p4 <- ggplot(df_E %>%
arrange(`ZFTA-Cluster 3`))+
geom_point(aes(x=set_x, y=set_y,color=`ZFTA-Cluster 3`),size=1)+
scale_colour_manual(values=c('grey90', "#46B4AF"), name="")+
theme_cellstate_plot +
labs(title = 'ZFTA-Cluster 3') +
geom_vline(xintercept = 0, linetype="dotted") +
#scale_x_continuous(limits = c(-0.7, 1)) +
#scale_y_continuous(limits = c(-1.2, 1.5)) +
theme(axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks = element_blank())+
theme(legend.direction="horizontal",legend.position="bottom")
p5 <- ggplot(df_E %>%
arrange(`ZFTA-Cluster 4`))+
geom_point(aes(x=set_x, y=set_y,color=`ZFTA-Cluster 4`),size=1)+
scale_colour_manual(values=c('grey90', "#4682B4"), name="")+
theme_cellstate_plot +
labs(title = 'ZFTA-Cluster 4') +
geom_vline(xintercept = 0, linetype="dotted") + geom_hline(yintercept = 0, linetype="dotted") +
#scale_x_continuous(limits = c(-0.6, 1)) +
#scale_y_continuous(limits = c(-1.2, 1.5)) +
theme(axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks = element_blank())+
theme(legend.direction="horizontal",legend.position="bottom")
p6 <- ggplot(df_E %>%
arrange(`ST-YAP1`))+
geom_point(aes(x=set_x, y=set_y,color=`ST-YAP1`),size=1)+
scale_colour_manual(values=c('grey90', "#B4AF46"), name="")+
theme_cellstate_plot +
labs(title = 'ST-YAP1') +
geom_vline(xintercept = 0, linetype="dotted") + geom_hline(yintercept = 0, linetype="dotted") +
#scale_x_continuous(limits = c(-0.7, 1)) +
#scale_y_continuous(limits = c(-1.2, 1.5)) +
theme(axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks = element_blank())+
theme(legend.direction="horizontal",legend.position="bottom")
wrap_plots(p1, p2, p3, p4, p5, p6)
ggsave(file.path(plot_dir, "6_CellState_plot_4way_Subtype_split.pdf"), width=10, height=8)
Based on Nowakowski scores
Idents(object = seurat_obj_mal) <- "Sample_deID"
seurat_obj.pseudobulk <- AverageExpression(seurat_obj_mal, group.by = "Sample_deID", assays = "RNA", return.seurat = TRUE, verbose = TRUE)
FALSE As of Seurat v5, we recommend using AggregateExpression to perform pseudo-bulk analysis.
FALSE Centering and scaling data matrix
FALSE
FALSE This message is displayed once per session.
# Add metadata
# extract metadata information from seurat object
subtype_info <- seurat_obj_mal@meta.data %>%
group_by(Sample_deID, Subtype) %>%
summarize()
FALSE `summarise()` has grouped output by 'Sample_deID'. You can override using the
FALSE `.groups` argument.
subtype_info
seurat_obj.pseudobulk <- AddMetaData(seurat_obj.pseudobulk, subtype_info)
# Score each sample for cell states
scores <- c(markers_NMF_30['Neuroepithelial-like'], markers_NMF_30['Ependymal-like'], markers_NMF_30['Neuronal-like'], markers_NMF_30['Embryonic-like'], markers_NMF_30['Embryonic-neuronal-like'])
names(scores) <- c('Neuroepithelial-like', 'Ependymal-like', 'Neuronal-like', 'Embryonic-like', 'Embryonic-neuronal-like')
seurat_obj.pseudobulk <- AddModuleScore(object = seurat_obj.pseudobulk, assay = 'RNA', features = scores, name = names(scores), seed=5)
# rename metadata names of scores
# identify number of first column with metadata scores
col_start <- length(colnames(seurat_obj.pseudobulk@meta.data)) - length(names(scores)) +1
# identify number of last column with metadata scores
col_end <- length(colnames(seurat_obj.pseudobulk@meta.data))
# rename columns with score name
colnames(seurat_obj.pseudobulk@meta.data)[col_start:col_end] <- names(scores)
Plot hierachy plot of pseudo-bulk expression
x1 = "Neuroepithelial-like"
x2 = "Embryonic-like"
y1 = "Neuronal-like"
y2 = "Ependymal-like"
group.by="Subtype"
sample=seurat_obj.pseudobulk
variables_to_retrieve <- c(x1, x2, y1,y2)
# And store them as a tibble.
scores <- sample@meta.data[, variables_to_retrieve]
# Shuffle the cells so that we accomplish a random plotting, not sample by sample.
# Compute Y axis values.
d <- apply(scores, 1, function(x){max(x[c(x1, x2)]) - max(x[c(y1, y2)])})
# Compute X axis values.
x <- vapply(seq_along(d), function(x) {
if (d[x] > 0) {
d <- log2(abs(scores[x, x1] - scores[x, x2]) + 1)
ifelse(scores[x, x1] < scores[x, x2], d, -d)
} else {
d <- log2(abs(scores[x, y1] - scores[x, y2]) + 1)
ifelse(scores[x, y1] < scores[x, y2], d, -d)
}
}, FUN.VALUE = numeric(1))
names(x) <- rownames(scores)
# Define titles for the axis.
x_lab1 <- paste0(y1, " <----> ", y2)
x_lab2 <- paste0(x1, " <----> ", x2)
y_lab1 <- paste0(y1, " <----> ", x1)
y_lab2 <- paste0(x2, " <----> ", y2)
# Plot.
df_E <- data.frame(row.names = rownames(scores))
df_E[["set_x"]] <- x
df_E[["set_y"]] <- d
df_E[["group.by"]] <- sample@meta.data[, group.by]
df_E[["subtype"]] <- seurat_obj.pseudobulk$Subtype
colors.use=c("#B44672","#B47846","#46B478","#46B4AF","#4682B4","#B4AF46")
names(colors.use) = c("ZFTA-RELA","ZFTA-Cluster 1","ZFTA-Cluster 2","ZFTA-Cluster 3","ZFTA-Cluster 4","ST-YAP1")
samples = unique(df_E$group.by)
samples = c("ZFTA-RELA","ZFTA-Cluster 1","ZFTA-Cluster 2","ZFTA-Cluster 3","ZFTA-Cluster 4", "ST-YAP1")
for (i in 1:length(samples)) {
tmp <- df_E$group.by==samples[i]
tmp <- gsub("TRUE",samples[i],tmp)
tmp <- gsub("FALSE","Other",tmp)
df_E <- cbind(df_E,tmp)
}
subtypes <- c("ZFTA-RELA","ZFTA-Cluster 1","ZFTA-Cluster 2","ZFTA-Cluster 3","ZFTA-Cluster 4", "ST-YAP1")
colnames(df_E)[5:10] <- subtypes
ggplot(df_E,aes(x=set_x,
y=set_y,
col=group.by))+
geom_point(size=3)+
scale_color_manual(values=colors.use)+
theme_cellstate_plot +
labs(x = 'Relative meta-module score', y = 'Relative meta-module score', title = 'ST-EPN cellular hierarchy', subtitle = 'n = 6,933 cells') +
geom_vline(xintercept = 0, linetype="dotted") + geom_hline(yintercept = 0, linetype="dotted")
ggsave(file.path(plot_dir, "8_CellState_plot_4way_pseudobulk.pdf"), width=5.5, height=4)